In [1]:
from IPython.display import IFrame, display, HTML

import pandas as pd
import numpy as np

from bokeh.models import ColumnDataSource, Plot, Circle, Range1d, LinearAxis, TapTool, HoverTool, Text
from bokeh.embed import file_html
from bokeh.plotting import vplot
from bokeh.resources import INLINE
from bokeh.models.actions import Callback
from bokeh.models.widgets import Slider

Get the Data


In [2]:
# Links via http://www.gapminder.org/data/ 

"""
population_url = "http://spreadsheets.google.com/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=xls"
fertility_url = "http://spreadsheets.google.com/pub?key=phAwcNAVuyj0TAlJeCEzcGQ&output=xls"
life_expectancy_url = "http://spreadsheets.google.com/pub?key=tiAiXcrneZrUnnJ9dBU-PAw&output=xls"

def get_data(url):
    # Get the data from the url and return only 1962 - 2013
    df = pd.read_excel(url, index_col=0)
    df = df.unstack().unstack()
    df = df[(df.index >= 1962) & (df.index <= 2013)]
    df = df.unstack().unstack()    
    return df

fertility_df = get_data(fertility_url)
life_expectancy_df = get_data(life_expectancy_url)
population_df = get_data(population_url)

fertility_df.to_hdf('fertility_df.hdf', 'df')
life_expectancy_df.to_hdf('life_expectancy_df.hdf', 'df')
population_df.to_hdf('population_df.hdf', 'df')
"""
fertility_df = pd.read_hdf('fertility_df.hdf', 'df')
life_expectancy_df = pd.read_hdf('life_expectancy_df.hdf', 'df')
population_df = pd.read_hdf('population_df.hdf', 'df')

In [3]:
# have common countries across all data
fertility_df = fertility_df.drop(fertility_df.index.difference(life_expectancy_df.index))
population_df = population_df.drop(population_df.index.difference(life_expectancy_df.index))

# get a size value based on population, but don't let it get too small
population_df_size = np.sqrt(population_df/np.pi)/200
min_size = 3
population_df_size = population_df_size.where(population_df_size >= min_size).fillna(min_size)

Build an html plot


In [4]:
sources = {}

years = list(fertility_df.columns)

for year in years:
    fertility = fertility_df[year]
    fertility.name = 'fertility'
    life = life_expectancy_df[year]
    life.name = 'life' 
    population = population_df_size[year]
    population.name = 'population' 
    new_df = pd.concat([fertility, life, population], axis=1)
    sources['_' + str(year)] = ColumnDataSource(new_df)
    

xdr = Range1d(1, 8)
ydr = Range1d(20, 85)
plot = Plot(
    x_range=xdr,
    y_range=ydr,
    title="",
    plot_width=800,
    plot_height=400,
    outline_line_color=None,
    toolbar_location=None,
)
xaxis = LinearAxis()
yaxis = LinearAxis()   
plot.add_layout(xaxis, 'left')
plot.add_layout(yaxis, 'below')
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips))

renderer_source = sources['_1962']
highlighted = Circle(x='fertility', y='life', fill_color='#F6931F', line_color='#995a13', size='population')
plot.add_glyph(renderer_source, highlighted)

# Dictionary_of_sources is:
# {
#   1962: '_1962',
#   1963: '_1963',
#   ....
# }
# We turn this into a string  and replace '_1962' with _1962. So the end result is js_source_array:
# '{1962: _1962, 1963: _1963, ....}'
#
# When this is passed into the callback and then accessed at runtime,
# the _1962, _1963 are replaced with the actual source objects that are passed in as args.

dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")

code = """
    var key = slider.get('value'),
        sources = %s,
        new_source_data = sources[key].get('data');
    renderer_source.set('data', new_source_data);
    renderer_source.trigger('change');
""" % js_source_array

callback = Callback(args=sources, code=code)
slider = Slider(start=1962, end=2013, value=1, step=1, title="Year", callback=callback)
callback.args["slider"] = slider
callback.args["renderer_source"] = renderer_source


layout = vplot(plot, slider)
html = file_html(layout, INLINE, "gapminder")

In [5]:
display(HTML(html))


gapminder